### Replication Package for "Why is Intermediating Houses so Difficult? Evidence from iBuyers"
### Buchak, Matvos, Piskorski, and Seru
###
###
### buchak@stanford.edu

### Creates extra moments used in the calibration.

library(data.table)
library(ggplot2)
library(zoo)
library(Hmisc)
library(stringr)
library(fixest)

source('0_helper_functions.r')


get_additional_moments <- function() {
  
  data.in <- loadData()
  
  # iBuyer share
  MOMENT_share       = mean(data.in[year == 2018  & saleamount < 1000000]$iBuyer)
  
  # House price (2018)
  MOMENT_house.price = median(data.in[year == 2018  & saleamount < 1000000]$saleamount,na.rm=T)
  
  # Sample regression data
  sample.data <- data.in[year %in% 2013:2018  & saleamount < 1000000]
  
  
  # XS House price variation
  price.regression <- feols(log(saleamount) ~ 1  |              storiesnum^sewer^water^condo^airconditioning^heating^garage^quality^roofcover^locationinfluence^water^house.age^size.bin^qtr^fipscode , data=sample.data )
  MOMENT_house.price.sd.XS = sd(price.regression$residuals)
  MOMENT_house.price.sd.XS
  
  
  # TS House price variation
  price.regression <- feols(log(saleamount) ~ 1  | pclidirisfrmtd + storiesnum^sewer^water^condo^airconditioning^heating^garage^quality^roofcover^locationinfluence^water^house.age^size.bin^multistory , data=sample.data )
  MOMENT_house.price.sd.TS = sd(price.regression$residuals)
  MOMENT_house.price.sd.TS
  
  
  # Variation in share and errors (like Table 3 but somewhat tighter model)
  ## Filters (some of these Fixed-effects lack observations --> problematic in model estimation)
  data.analysis <- data.in[ saleamount < 1000000 & land_sqft < 300000 & living_sqft < 10000 & !(heating %in% c('SP0','ST0','RDH')) & quality != 'QFA' & !(garage %in% c('920','280','360','410','810','660')) & airconditioning != 'ASP'] 
  data.analysis[,zip3 := paste0('z',substr(zip5,1,3))]
  
  # Calculate last sale price 
  data.analysis <- data.analysis[order(pclidirisfrmtd,date)]
  data.analysis[,last.sale := shift(saleamount,1,type='lag'),by='pclidirisfrmtd']
  
  # Partition into training and testing. Training data is 2008-2012, make sure to exclude iBuyer sales (although there shouldn't be any)
  data.train <- data.analysis[year < 2013 & iBuyer.buyer == 0, c('age.bin','size.bin','multistory','zip5','qtr','saleamount','airconditioning','garage','heating','quality'),with=F]
  data.train <- data.train[complete.cases(data.train)]
  
  # Test data is 2018
  data.test  <- data.analysis[year >= 2014 ]
  
  # Recover pricing residuals
  pricing.model <- felm(log(saleamount) ~ age.bin  + size.bin + multistory  + airconditioning + garage + heating + quality  | paste0(qtr,zip5),data = data.train)
  
  # Get absolute errors. These are what we forecast. Exclude extremely large ones (likely data errors)
  data.train[,e_raw := abs(pricing.model$residuals)]
  data.reg <- data.train[e_raw < 3] 
  
  # Model to predict error terms
  mm.error =                     ' ~ age.bin  + size.bin + multistory    + airconditioning + garage + heating + quality '
  
  deviation.model <- lm(paste('e_raw',mm.error) ,data = data.reg)
  
  
  
  # Get predicted errors in test data.
  data.test[,predicted.e2 := predict(deviation.model,data.test)]
  
  # Build liquidity prediction model. These coefficients come from the MLS regressions (done separately on the NBER data server hence the manual codings.)
  data.test[,mr1 := log(last.sale)]
  data.test[,mr2 := log(last.sale)^2]
  data.test[,mr3 := log(last.sale)^3]
  data.test[,ls1 := log(land_sqft)]
  data.test[,ls2 := log(land_sqft)^2]
  data.test[,ls3 := log(land_sqft)^3]
  data.test[,is1 := log(living_sqft)]
  data.test[,is2 := log(living_sqft)^2]
  data.test[,is3 := log(living_sqft)^3]
  data.test[,ha1 := log(house.age)]
  data.test[,ha2 := log(house.age)^2]
  data.test[,ha3 := log(house.age)^3]
  
  # Create the model. Comes from regression this stuff on whether a listing sells within three months from the MLS 2013-2017 data.
  data.test[,prediction := (-7.458e1 +
                              - 4.288e-1 * mr1 + 6.143e-2 * mr2 + -2.344e-3 * mr3 + 
                              + 1.184e-1 * ls1 + 8.913e-3 * ls2 + -1.688e-3 * ls3 + 
                              2.921e1  * is1 - 3.779e0  * is2 +  1.621e-1 * is3 + 
                              1.182e-2 * ha1 + 2.430e-2 * ha2 + -5.951e-3 * ha3 +
                              -1.147e-1 * multistory)]
  
  toReg <- data.test
  
  # Regress iBuyer market share on predictions
  qq9 <- felm(iBuyer.buyer ~ predicted.e2 + prediction|paste(zip5,qtr)|0|pclidirisfrmtd,data=toReg[year == 2018  ])
  MOMENT_dS_de = qq9$coefficients['predicted.e2','iBuyer.buyer']
  MOMENT_dS_dl = qq9$coefficients['prediction','iBuyer.buyer']
  
  
  # Fraction with unprofitable sales  (negative PNL)
  data <- data.in[order(pclidirisfrmtd,qtr)]
  data[,buy.price  := saleamount]
  data[,sale.price := shift(saleamount,1,type='lead'),by='pclidirisfrmtd']
  data[,sale.qtr   := shift(qtr ,1,type='lead'),by='pclidirisfrmtd']
  data[,sale.date  := shift(date,1,type='lead'),by='pclidirisfrmtd']
  
  completed <- data[!is.na(sale.price) & sale.date != date & year == 2018  & saleamount < 1000000]
  completed[,T := as.integer(sale.date- date)/365]
  completed[,pnl := sale.price / buy.price - 1]
  completed <- completed[pnl != 0] # get rid  of zero pnl observations
  
  completed[,Type := 'Individual']
  completed[iBuyer.buyer == 1,Type := 'iBuyer']
  completed[iBuyer.buyer == 0 & corporateindicator == 'Y',Type := 'Other corporate']
  
  completed <- completed[year %in% 2013:2018]
  MOMENT_share.unprofitable = mean(completed[Type == 'iBuyer']$pnl < 0)
  
  return(list(share = MOMENT_share,price = MOMENT_house.price,xs_vol = MOMENT_house.price.sd.XS,ts_vol = MOMENT_house.price.sd.TS,dsde = MOMENT_dS_de,dsdl = MOMENT_dS_dl,unprofitable = MOMENT_share.unprofitable))
  
}

moments <- get_additional_moments()
write.table('../out/tables/ADDITIONAL_MOMENTS.csv',x=as.data.table(moments),row.names=F,col.names=T)
